﻿/*  
cd /projects/hsieh_project/proj_201809/code_1_data/
qsas data_1_sales_ind_sum.sas 8 &
*/

libname hr "/projects/hsieh_project/proj_201809/data/";

%Let dir_out = /projects/hsieh_project/proj_201809/data/;

/*
================================================================================
Load raw data
*/

%include "/projects/hsieh_project/proj_201809/code_1_data/m_read_sales.sas" /source2;

%m_read_sales(param_dev=0, param_lyear=0, param_czone=1, param_msa1983=1, param_msacz=1, param_drop=1);

/*
================================================================================
Calculate Concentration
================================================================================
*/

%macro ind_sum(dt_in= , dt_out= , l_perc= );

%put Top Industry Firms;
%put Input &dt_in.;
%put Output &dt_out.;
%put Percentile &l_perc.;

/* Drop emp-based perc and rank. Rename the sales-based perc and rank. */
data dt_proc;
  set &dt_in.(keep=year ch_ind sector firmnum lbdid sales sales_indf_rank sales_indf_perc runif);
  rename sales_indf_rank=indf_rank sales_indf_perc=indf_perc;
run;

/*
--------------------------------------------------------------------------------
Aggregate to year-industry-firm level
*/
proc sort data=dt_proc;
  by year sector ch_ind firmnum indf_rank indf_perc;
run;

proc means data=dt_proc noprint;
  by year sector ch_ind firmnum indf_rank indf_perc;
  output out=dt_indf(drop=_type_ _freq_) sum(sales)=sales_ind mean(runif)=runif;
run;

data dt_indf;
  set dt_indf;
  n_ind=1;
  sales2_indf = sales_ind**2; /* For HHI */
run;

/*
--------------------------------------------------------------------------------
Create variables related to top firms
e.g. sales of top 10% industry-firms (percentile based on sales and defined at industry level)
*/

%Let l_ind_var=sales_ind n_ind sales2_indf;

%Let i_list = 1;
%do %while (%scan(%bquote(&l_perc), &i_list) ~= );
  %Let i_perc=%scan(%bquote(&l_perc), &i_list.);
  %put Top &i_perc.% Industry Firms;
  
  data dt_indf;
    set dt_indf;
    sales_ind_&i_perc. = 0;
    n_ind_&i_perc. = 0;
  run;
  
  data dt_indf;
    set dt_indf;
    if indf_perc <= %sysevalf(&i_perc. / 100) then 
      do;
	sales_ind_&i_perc. = sales_ind;
	n_ind_&i_perc. = 1;
      end;
  run;
  
  %Let l_ind_var = &l_ind_var. sales_ind_&i_perc. n_ind_&i_perc.;
  %Let i_list = %eval(&i_list + 1);
%end;

%put List of Variables for Top Firms;
%put &l_ind_var.;

/*
--------------------------------------------------------------------------------
Aggregate to year-industry level
*/

proc sort data=dt_indf; by year sector ch_ind;

proc means data=dt_indf noprint;
  by year sector ch_ind;
  output out=&dt_out.(drop=_type_ _freq_) sum(&l_ind_var.)=&l_ind_var.;
run;

data &dt_out.;
  set &dt_out.;
  sales_hhi_ind = sales2_indf / (sales_ind**2);
run;

%mend;

/*
================================================================================
Calculate statistics at industry level, with reference to city
1) Total numbers of markets by industry: count number of markets for each ind-firm, and sum across ind-firms by industry.
2) Average market size by industry: calculate the average market size for each ind-firm, and take average across ind-firms by industry.
  2.1) Market size defined as total employment in city.
*/

%macro ind_sum_city(dt_in= , dt_out= , c_city= , l_perc= );

%put Top Industry Firms;
%put Input &dt_in.;
%put Output &dt_out.;
%put City &c_city.;
%put Percentile &l_perc.;

data dt_proc;
  set &dt_in.(keep=year &c_city. ch_ind sector firmnum lbdid sales sales_indf_rank sales_indf_perc runif);
  rename sales_indf_rank=indf_rank sales_indf_perc=indf_perc;
run;

/* 
--------------------------------------------------------------------------------
Aggregate to year-industry-firm-city level
*/

proc sort data=dt_proc;
  by year sector ch_ind firmnum indf_rank indf_perc &c_city.;
run;

proc means data=dt_proc noprint;
  by year sector ch_ind firmnum indf_rank indf_perc &c_city.;
  output out=dt_indf(drop=_type_ _freq_) sum(sales)=sales mean(runif)=runif;
run;

/*
--------------------------------------------------------------------------------
Numbers of market served by firms
*/

/* Count total number of cities for all firms */
data dt_indf;
  set dt_indf;
  ind_&c_city._tot=1;
run;

/* Count total number of cities for top firms */
%Let l_ind_var=ind_&c_city._tot;

%Let i_list = 1;
%do %while (%scan(%bquote(&l_perc), &i_list) ~= );
  %Let i_perc=%scan(%bquote(&l_perc), &i_list.);
  %put Top &i_perc.% Industry Firms;
  
  data dt_indf;
    set dt_indf;
    ind_&c_city._tot_&i_perc.=0;
    
    if indf_perc <= %sysevalf(&i_perc. / 100) then 
      do;
	ind_&c_city._tot_&i_perc.= 1;
      end;
  run;
  
  %Let l_ind_var = &l_ind_var. ind_&c_city._tot_&i_perc.;
  %Let i_list = %eval(&i_list + 1);
%end;

%put List of Variables for Top Firms;
%put &l_ind_var.;


/*
----------------------------------------
Aggregate to year-industry level
*/

proc sort data=dt_indf; by year sector ch_ind;

proc means data=dt_indf noprint;
  by year sector ch_ind;
  output out=&dt_out.(drop=_type_ _freq_) sum(&l_ind_var.)=&l_ind_var.;
run;

%mend;

/*
================================================================================
Main
*/

%ind_sum(dt_in=lbd, dt_out=lbd_ind, l_perc=1 10 20 30 40 50 60 70 80 90);
%ind_sum_city(dt_in=lbd, dt_out=lbd_ind_fips, c_city=fips, l_perc=1 10 20 30 40 50 60 70 80 90);
%ind_sum_city(dt_in=lbd, dt_out=lbd_ind_msa, c_city=msa, l_perc=1 10 20 30 40 50 60 70 80 90);
%ind_sum_city(dt_in=lbd, dt_out=lbd_ind_czone, c_city=czone, l_perc=1 10 20 30 40 50 60 70 80 90);
%ind_sum_city(dt_in=lbd, dt_out=lbd_ind_msa1983, c_city=msa1983, l_perc=1 10 20 30 40 50 60 70 80 90);
%ind_sum_city(dt_in=lbd, dt_out=lbd_ind_msa1983cz, c_city=msa1983cz, l_perc=1 10 20 30 40 50 60 70 80 90);

data lbd;
   set lbd;
   if zipcode = "" then delete;
   zipcode = substr(zipcode,1,5);
run;
%ind_sum_city(dt_in=lbd, dt_out=lbd_ind_zip, c_city=zipcode, l_perc=1 10 20 30 40 50 60 70 80 90);

/* Drop the automatically generated unwanted variables (already dropped) */

/*
--------------------------------------------------------------------------------
Export
*/

data lbd_ind;
  merge lbd_ind lbd_ind_zip lbd_ind_fips lbd_ind_msa lbd_ind_czone lbd_ind_msa1983 lbd_ind_msa1983cz;
  by year sector ch_ind;
run;

proc export data=lbd_ind outfile="&dir_out./sales_ind_sum.dta" replace;
run;

/* End of SAS file */
